/*
 Copyright 2013--Present JMM_PROGNAME
 
 This file is distributed under the terms of the JMM_PROGNAME License.
 
 You should have received a copy of the JMM_PROGNAME License.
 If not, see <JMM_PROGNAME WEBSITE>.
*/
// CREATED    : 7/19/2015
// LAST UPDATE: 9/25/2015

// These subroutines optimize the hyperparameters of/for a Gaussian process; currently (7/25/2015) these include the hyperparameters of the covariance function + the noise in the data. On input and output, these hyperparameters are passed separately; internally though, they are combined into a single vector. 

// STL
#include <memory>    // std::unique_ptr<>
//#include <stdexcept> // std::runtime_error()
//#include <tuple>     // std::tie()
#include <utility>   // std::pair<>, std::make_pair()
#include <vector>    // std::vector<>

// jScience
#include "jScience/linalg/Matrix.hpp" // Matrix<>
#include "jScience/linalg/Vector.hpp" // Vector<>
#include "jScience/math/utility.hpp"  // ngroups()
#include "jminimization.h"            // sim_anneal()

// stats++
#include "statsxx/optimization/gradient_based/RPROP.hpp" // RPROP
#include "statsxx/optimization/line_search_methods/ConjugateGradient.hpp" // ConjugateGradient
#include "statsxx/statistics/CovarianceFunction.hpp" // CovarianceFunction


static double fwrapv(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const std::vector<double> &x, const int n);

static double fwrapV(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &x, const int n);
static Vector<double> dfwrapV(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &x, const int n);

// wrapper functions for f(x) and f'(x)
static double f(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &theta);
static Vector<double> df(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &theta);

std::pair<std::vector<double>,
          double> convert(const Vector<double> &x);


//========================================================================
//========================================================================
//
// NAME: void GaussianProcess::train(const Matrix<double> &XX, const Vector<double> &yy)
//
// DESC: (de-)Normalizes data to lie in the range [0,1].
//
//========================================================================
//========================================================================
std::pair<std::vector<double>,
          double> Gaussian_process::optimize_hyperparameters_CV(std::unique_ptr<CovarianceFunction> const &k, std::vector<double> theta, const Matrix<double> &X, const Vector<double> &y, double sigma_n, const int k_CV)
{
    std::vector<int> batch_begin;
    std::vector<int> batch_end;
    std::vector<int> batch_size;
    std::tie(batch_begin, batch_end, batch_size) = ngroups(X.size(0), k);

    for(int k = 0; k < k_CV; ++k)
    {
        Matrix<double> Xp((X.size(0)-batch_size[i]), X.size(1));
        
        for()
        {
            
        }
    }
    
    
    
    
    
    
    
    
    
    
    
    
    // setup wrapper functions 
    std::function<double(const Vector<double> &)> fwrap = std::bind(f, std::cref(k), std::cref(X), std::cref(y), std::placeholders::_1);
    std::function<Vector<double>(const Vector<double> &)> dfwrap = std::bind(df, std::cref(k), std::cref(X), std::cref(y), std::placeholders::_1);
    
    // create a single hyperparameters vector (covariance function + Gaussian process noise)
    theta.push_back(sigma_n);
    Vector<double> x(theta);
    
    // minimize
    int info;
    double fx;
//    ConjugateGradient cg;
//    std::tie(info, x, fx) = cg.minimize(fwrap, dfwrap, x);
//    RPROP rp;
//    std::tie(info, x, fx) = rp.minimize(fwrap, dfwrap, x);
    // ******
    
/*
    double eps = 0.01;
    
    double T = -1.0; // determine optimum
    int N_T = 10;
    int N_S = 5;
    
    double vinit = 0.1;
    
    bool silent = false;
    
    std::function<double(const std::vector<double> &)> fwrap2 = std::bind(fwrapv, std::cref(k), std::cref(X), std::cref(y), std::placeholders::_1);
    
    int xtype[x.size()];
    double a[x.size()];
    double b[x.size()];
    double v[x.size()];
    for(auto i = 0; i < x.size(); ++i)
    {
        xtype[i] = 1;
        a[i] = 0.01;
        b[i] = 100.0;
        v[i] = vinit;
    }
    
    a[x.size()-1] = 0.0;
    b[x.size()-1] = 0.1;
    v[x.size()-1] = 1.0e-6;
    
    
    
    std::vector<double> xv = x.std_vector();
    
    sim_anneal(
               xv,
               x.size(),
               xtype,
               a,
               b,
               fwrap2,
               T,
               v,
               eps,
               N_T,
               N_S,
               silent
               );
    
    x = Vector<double>(xv);
*/
    
/*
    double eps = 0.01;
    
    double T = -1.0; // determine optimum
    int N_T = 10;
    int N_S = 5;
    
    double vinit = 0.1;
    
    bool silent = false;
    
    int n = x.size();
    
    std::function<double(const std::vector<double> &)> fwrap2 = std::bind(fwrapv, std::cref(k), std::cref(X), std::cref(y), std::placeholders::_1, std::cref(n));
    
    int xtype[2];
    double a[2];
    double b[2];
    double v[2];
    
    xtype[0] = 1;
    xtype[1] = 1;
    
    a[0] = 0.01;
    b[0] = 100.0;
    v[0] = 0.1;
    
    a[1] = 0.0;
    b[1] = 0.1;
    v[1] = 1.0e-6;
    
    std::vector<double> xv = {x(0), x((n-1))};
    
    sim_anneal(
               xv,
               2,
               xtype,
               a,
               b,
               fwrap2,
               T,
               v,
               eps,
               N_T,
               N_S,
               silent
               );
    
    x = Vector<double>(n, xv[0]);
    x((n-1)) = xv[1];    
*/
    
    int n = x.size();
    
    std::function<double(const Vector<double> &)> fwrap3 = std::bind(fwrapV, std::cref(k), std::cref(X), std::cref(y), std::placeholders::_1, std::cref(n));
    std::function<Vector<double>(const Vector<double> &)> dfwrap3 = std::bind(dfwrapV, std::cref(k), std::cref(X), std::cref(y), std::placeholders::_1, std::cref(n));
    
    Vector<double> tmpx(2);
    tmpx(0) = x(0);
    tmpx(1) = x((n-1));
    
    ConjugateGradient cg;
    std::tie(info, tmpx, fx) = cg.minimize(fwrap3, dfwrap3, tmpx);
    
    x = Vector<double>(n, tmpx(0));
    x((n-1)) = tmpx(1);
    
    
    // ******
    
    // convert and return the components of the (single) hyperparameters vector
    return convert(x);
}

// *****

static double fwrapV(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &x, const int n)
{
    Vector<double> theta(n, x(0));
    theta((n-1)) = x(1);
    
    return f(k, X, y, theta);
}

static Vector<double> dfwrapV(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &x, const int n)
{
    Vector<double> theta(n, x(0));
    theta((n-1)) = x(1);
    
    Vector<double> vdf = df(k, X, y, theta);
    
    Vector<double> ret(2);
    ret(0) = theta(0);
    ret(1) = theta((n-1));
    
    return ret;
}


static double fwrapv(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const std::vector<double> &x, const int n)
{
    Vector<double> theta(n, x[0]);
    theta((n-1)) = x[1];
    
    return f(k, X, y, theta);
}

// *****

static double f(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &x)
{
    std::vector<double> theta;
    double sigma_n;
    std::tie(theta, sigma_n) = convert(x);

    std::unique_ptr<CovarianceFunction> knew = k->clone();
    knew->assign_hyperparameters(theta);
    
    Gaussian_process::GaussianProcess gp(knew);
    
    gp.add_data(X, y, sigma_n);
    
    return -gp.likelihood();
}


static Vector<double> df(std::unique_ptr<CovarianceFunction> const &k, const Matrix<double> &X, const Vector<double> &y, const Vector<double> &x)
{
    std::vector<double> theta;
    double sigma_n;
    std::tie(theta, sigma_n) = convert(x);
    
    std::unique_ptr<CovarianceFunction> knew = k->clone();
    knew->assign_hyperparameters(theta);
    
    Gaussian_process::GaussianProcess gp(knew);
    
    gp.add_data(X, y, sigma_n);

    return -Vector<double>(gp.dlikelihood());
}


std::pair<std::vector<double>,
          double> convert(const Vector<double> &x)
{
    std::vector<double> theta = x.std_vector();
    double sigma_n = theta.back();
    theta.pop_back();
    
    return std::make_pair(theta, sigma_n);
}
